#ifndef __CUDA_RUNTIME_WRAPPER_H__
#define __CUDA_RUNTIME_WRAPPER_H__

#include "cuda_benchmark_targets.h"
#include "cuda_wrapper_internal_types.h"
#include "mc_runtime_types.h"

#ifdef __cplusplus
#include <climits>
#include <cstdlib>
#include <cstring>
#else
#include <stdlib.h>
#include <string.h>
#endif

#include <stdint.h>

#include "mc_reference_surface_types.h"
#include "mc_reference_texture_types.h"

#ifdef __cplusplus
#define __dv(x) = x
#else
#define __dv(x)
#endif

extern enum wcudaNvArchType __nv_arch_type_internal__;
extern int __wcuda_version_internal__;

#ifdef __cplusplus
extern "C" {
#endif
/**
 *  @defgroup wcudaRuntime CudaRuntime Wrapper API
 *  @{
 *  This section describes functions of CUDA Runtime Wrapper API.
 *
 */

/**
 *  @defgroup Device Device Management
 *  @{
 *  This section describes the Device Management functions of CUDA Runtime Wrapper API.
 *
 */
mcError_t wcudaGetDevice(int *device);
mcError_t wcudaSetDevice(int device);
mcError_t wcudaGetDeviceFlags(unsigned int *flags);
mcError_t wcudaSetDeviceFlags(unsigned int flags);
mcError_t wcudaGetDeviceCount(int *count);

mcError_t wcudaDeviceGetByPCIBusId(int *device, const char *pciBusId);
mcError_t wcudaDeviceGetPCIBusId(char *pciBusId, int len, int device);
mcError_t wcudaDeviceGetSharedMemConfig(enum mcSharedMemConfig *pConfig);
mcError_t wcudaDeviceSetSharedMemConfig(enum mcSharedMemConfig config);
mcError_t wcudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority);
mcError_t wcudaDeviceSynchronize(void);

mcError_t wcudaChooseDevice(int *device, const mcDeviceProp_t *prop);
mcError_t wcudaDeviceGetDefaultMemPool(mcMemPool_t *memPool, int device);
mcError_t wcudaDeviceGetMemPool(mcMemPool_t *memPool, int device);
mcError_t wcudaDeviceSetMemPool(int device, mcMemPool_t memPool);
mcError_t wcudaDeviceGetCacheConfig(enum mcFuncCache_t *pCacheConfig);
mcError_t wcudaDeviceSetCacheConfig(enum mcFuncCache_t cacheConfig);
mcError_t wcudaDeviceGetLimit(size_t *pValue, enum mcLimit_t limit);
mcError_t wcudaDeviceSetLimit(enum mcLimit_t limit, size_t value);
mcError_t wcudaDeviceReset(void);
mcError_t wcudaSetValidDevices(int *deviceArr, int len);
mcError_t wcudaDeviceGetP2PAttribute(int *value, enum mcDeviceP2PAttr attr, int srcDevice,
                                     int dstDevice);
mcError_t wcudaDeviceFlushGPUDirectRDMAWrites(enum mcFlushGPUDirectRDMAWritesTarget target,
                                              enum mcFlushGPUDirectRDMAWritesScope scope);
mcError_t wcudaDeviceGetNvSciSyncAttributes(void *mcSciSyncAttrList, int device, int flags);
mcError_t wcudaGetDeviceProperties(mcDeviceProp_t *prop, int device);
mcError_t wcudaDeviceGetAttribute(int *value, enum mcDeviceAttribute_t attr, int device);
mcError_t wcudaDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements,
                                                const mcChannelFormatDesc *fmtDesc, int device);
/**
 * @} Device
 */

/**
 *  @defgroup Thread Management
 *  @{
 *  This section describes Thread Management functions of CUDA Runtime Wrapper API.
 *
 */
mcError_t wcudaThreadGetLimit(size_t *pValue, enum mcLimit_t limit);
mcError_t wcudaThreadSetLimit(enum mcLimit_t limit, size_t value);
mcError_t wcudaThreadGetCacheConfig(enum mcFuncCache_t *cacheConfig);
mcError_t wcudaThreadSetCacheConfig(enum mcFuncCache_t cacheConfig);
mcError_t wcudaThreadSynchronize(void);
mcError_t wcudaThreadExit(void);
/**
 * @} Thread Management
 */

/**
 *  @defgroup Error Error Handling
 *  @{
 *  This section describes the Error Handling functions of CUDA Runtime Wrapper API.
 *
 */

mcError_t wcudaGetLastError(void);
const char *wcudaGetErrorName(mcError_t mc_error);
const char *wcudaGetErrorString(mcError_t mc_error);
mcError_t wcudaPeekAtLastError(void);

/**
 * @} Error
 */

/**
 *  @defgroup Stream Stream Management
 *  @{
 *  This section describes the Stream Management functions of CUDA Runtime Wrapper API.
 *
 */
mcError_t wcudaStreamCreate(mcStream_t *pStream);
mcError_t wcudaStreamCreateWithFlags(mcStream_t *pStream, unsigned int flags);
mcError_t wcudaStreamCreateWithPriority(mcStream_t *pStream, unsigned int flags, int priority);
mcError_t wcudaStreamDestroy(mcStream_t stream);
mcError_t wcudaStreamGetFlags(mcStream_t stream, unsigned int *flags);
mcError_t wcudaStreamGetPriority(mcStream_t stream, int *priority);
mcError_t wcudaStreamQuery(mcStream_t stream);
mcError_t wcudaStreamSynchronize(mcStream_t stream);
mcError_t wcudaStreamWaitEvent(mcStream_t stream, mcEvent_t event, unsigned int flags __dv(0));
mcError_t wcudaStreamAddCallback(mcStream_t stream, mcStreamCallback_t callback, void *userData,
                                 unsigned int flags);
mcError_t wcudaCtxResetPersistingL2Cache(void);
mcError_t wcudaStreamAttachMemAsync(mcStream_t stream, void *devPtr, size_t length __dv(0),
                                    unsigned int flags __dv(mcMemAttachSingle));
mcError_t wcudaStreamCopyAttributes(mcStream_t dst, mcStream_t src);
mcError_t wcudaStreamSetAttribute(mcStream_t hStream, mcStreamAttrID attr,
                                  const mcStreamAttrValue *value);
mcError_t wcudaStreamGetAttribute(mcStream_t hStream, mcStreamAttrID attr,
                                  mcStreamAttrValue *value_out);
mcError_t wcudaStreamBeginCapture(mcStream_t stream, mcStreamCaptureMode mode);
mcError_t wcudaStreamEndCapture(mcStream_t stream, mcGraph_t *graph);
mcError_t wcudaStreamIsCapturing(mcStream_t stream, mcStreamCaptureStatus *status);
mcError_t wcudaStreamGetCaptureInfo(mcStream_t stream, mcStreamCaptureStatus *status,
                                    unsigned long long *id_out);
mcError_t wcudaStreamGetCaptureInfo_v2(mcStream_t stream, mcStreamCaptureStatus *status,
                                       unsigned long long *id_out __dv(0),
                                       mcGraph_t *graph_out __dv(0),
                                       const mcGraphNode_t **dependencies_out __dv(0),
                                       size_t *numDependencies_out __dv(0));
mcError_t wcudaStreamUpdateCaptureDependencies(mcStream_t stream, mcGraphNode_t *dependencies,
                                               size_t numDependencies, unsigned int flags __dv(0));
mcError_t wcudaThreadExchangeStreamCaptureMode(mcStreamCaptureMode *mode);
/**
 * @} Stream
 */

/**
 *  @defgroup Event Event Management
 *  @{
 *  This section describes the Event Management functions of CUDA Runtime Wrapper API.
 *
 */
mcError_t wcudaIpcGetEventHandle(mcIpcEventHandle_t *handle, mcEvent_t event);
mcError_t wcudaIpcOpenEventHandle(mcEvent_t *event, mcIpcEventHandle_t handle);
mcError_t wcudaEventCreate(mcEvent_t *event);
mcError_t wcudaEventCreateWithFlags(mcEvent_t *event, unsigned int flags);
mcError_t wcudaEventDestroy(mcEvent_t event);
mcError_t wcudaEventElapsedTime(float *ms, mcEvent_t start, mcEvent_t end);
mcError_t wcudaEventQuery(mcEvent_t event);
mcError_t wcudaEventRecord(mcEvent_t event, mcStream_t stream __dv(0));
mcError_t wcudaEventRecordWithFlags(mcEvent_t event, mcStream_t stream __dv(0),
                                    unsigned int flags __dv(0));
mcError_t wcudaEventSynchronize(mcEvent_t event);

/**
 * @} Event
 */

/**
 *  @defgroup Execution Execution Control
 *  @{
 *  This section describes the Execution Control functions of CUDA Runtime Wrapper API.
 *
 */
mcError_t wcudaFuncGetAttributes(struct mcFuncAttributes *attr, const void *func);
mcError_t wcudaLaunchKernel(const void *hostFunction, dim3 gridDim, dim3 blockDim, void **args,
                            size_t sharedMemBytes, mcStream_t stream);
mcError_t wcudaFuncSetAttribute(const void *func, mcFuncAttribute attr, int value);
mcError_t wcudaFuncSetCacheConfig(const void *func, enum mcFuncCache_t cacheConfig);
mcError_t wcudaFuncSetSharedMemConfig(const void *func, enum mcSharedMemConfig config);
mcError_t wcudaLaunchCooperativeKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args,
                                       size_t sharedMem, mcStream_t stream);
mcError_t wcudaLaunchHostFunc(mcStream_t stream, mcHostFn_t fn, void *userData);
mcError_t wcudaLaunchCooperativeKernelMultiDevice(mcLaunchParams *launchParamsList,
                                                  unsigned int numDevices, unsigned flags __dv(0));

/* TODO:wcuda_skipped - deprecated as of CUDA 7.5 */
mcError_t wcudaSetDoubleForDevice(double *d);
/* TODO:wcuda_skipped - deprecated as of CUDA 7.5 */
mcError_t wcudaSetDoubleForHost(double *d);

/**
 * @} Execution
 */

/**
 *  @defgroup Occupancy Occupancy
 *  @{
 *  This section describes the Occupancy functions of CUDA Runtime Wrapper API.
 *
 */

mcError_t wcudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, const void *func,
                                                     int numBlocks, int blockSize);
mcError_t wcudaOccupancyMaxPotentialBlockSize(int *minGridSize, int *blockSize, const void *func,
                                              size_t dynamicSMemSize __dv(0),
                                              int blockSizeLimit __dv(0));
mcError_t wcudaOccupancyMaxPotentialBlockSizeWithFlags(int *minGridSize, int *blockSize,
                                                       const void *func,
                                                       size_t dynamicSMemSize __dv(0),
                                                       int blockSizeLimit __dv(0),
                                                       unsigned int flags __dv(0));
mcError_t wcudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func,
                                                         int blockSize, size_t dynamicSMemSize);
mcError_t wcudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func,
                                                                  int blockSize,
                                                                  size_t dynamicSMemSize,
                                                                  unsigned int flags);
/**
 * @} Occupancy
 */

/**
 *  @defgroup Memory Memory Management
 *  @{
 *  This section describes the Memory Management functions of CUDA Runtime Wrapper API.
 *
 */

mcError_t wcudaMalloc(void **ptr, size_t sizeBytes);
mcError_t wcudaMallocHost(void **ptr, size_t size);
mcError_t wcudaMalloc3D(struct mcPitchedPtr *pitchedDevPtr, struct mcExtent extent);
mcError_t wcudaMallocArray(mcArray_t *array, const mcChannelFormatDesc *desc, size_t width,
                           size_t height __dv(0), unsigned int flags __dv(0));
mcError_t wcudaMalloc3DArray(mcArray_t *array, const mcChannelFormatDesc *desc,
                             struct mcExtent extent, unsigned int flags __dv(0));
mcError_t wcudaMallocMipmappedArray(mcMipmappedArray_t *mipmappedArray,
                                    const mcChannelFormatDesc *desc, struct mcExtent extent,
                                    unsigned int numLevels, unsigned int flags __dv(0));
mcError_t wcudaMemcpy(void *dst, const void *src, size_t sizeBytes, mcMemcpyKind kind);
mcError_t wcudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width,
                             size_t height, enum _mcMemcpyKind kind, mcStream_t stream __dv(0));
mcError_t wcudaMemcpy2DToArrayAsync(mcArray_t dst, size_t wOffset, size_t hOffset, const void *src,
                                    size_t spitch, size_t width, size_t height, mcMemcpyKind kind,
                                    mcStream_t stream __dv(0));
mcError_t wcudaMemcpy2DFromArrayAsync(void *dst, size_t dpitch, mcArray_const_t src, size_t wOffset,
                                      size_t hOffset, size_t width, size_t height,
                                      mcMemcpyKind kind, mcStream_t stream __dv(0));
mcError_t wcudaMemcpyAsync(void *dst, const void *src, size_t count, enum _mcMemcpyKind kind,
                           mcStream_t stream __dv(0));

/* TODO:wcuda_skipped - deprecated */
mcError_t wcudaMemcpyToArray(mcArray_t dst, size_t wOffset, size_t hOffset, const void *src,
                             size_t count, enum _mcMemcpyKind kind);
/* TODO:wcuda_skipped - deprecated */
mcError_t wcudaMemcpyFromArray(void *dst, mcArray_const_t src, size_t wOffset, size_t hOffset,
                               size_t count, mcMemcpyKind kind);
/* TODO:wcuda_skipped - deprecated */
mcError_t wcudaMemcpyArrayToArray(mcArray_t dst, size_t wOffsetDst, size_t hOffsetDst,
                                  mcArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc,
                                  size_t count, mcMemcpyKind kind __dv(mcMemcpyDeviceToDevice));
/* TODO:wcuda_skipped - deprecated */
mcError_t wcudaMemcpyToArrayAsync(mcArray_t dst, size_t wOffset, size_t hOffset, const void *src,
                                  size_t count, mcMemcpyKind kind, mcStream_t stream __dv(0));
/* TODO:wcuda_skipped - deprecated */
mcError_t wcudaMemcpyFromArrayAsync(void *dst, mcArray_const_t src, size_t wOffset, size_t hOffset,
                                    size_t count, mcMemcpyKind kind, mcStream_t stream __dv(0));
mcError_t wcudaMemcpy2DToArray(mcArray_t dst, size_t wOffset, size_t hOffset, const void *src,
                               size_t spitch, size_t width, size_t height, enum _mcMemcpyKind kind);
mcError_t wcudaMemcpy2DFromArray(void *dst, size_t dpitch, mcArray_const_t src, size_t wOffset,
                                 size_t hOffset, size_t width, size_t height, mcMemcpyKind kind);
mcError_t cudaMemcpy2DArrayToArray(mcArray_t dst, size_t wOffsetDst, size_t hOffsetDst,
                                   mcArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc,
                                   size_t width, size_t height,
                                   mcMemcpyKind kind __dv(mcMemcpyDeviceToDevice));
mcError_t wcudaFree(void *ptr);

mcError_t wcudaFreeArray(mcArray_t array);
mcError_t wcudaFreeMipmappedArray(mcMipmappedArray_t mipmappedArray);
mcError_t wcudaFreeHost(void *ptr);
mcError_t wcudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags);
mcError_t wcudaHostAlloc(void **pHost, size_t size, unsigned int flags);
mcError_t wcudaHostRegister(void *ptr, size_t size, unsigned int flags);
mcError_t wcudaHostUnregister(void *ptr);
mcError_t wcudaMemGetInfo(size_t *free, size_t *total);
mcError_t wcudaArrayGetInfo(mcChannelFormatDesc *desc, struct mcExtent *extent, unsigned int *flags,
                            mcArray_t array);
mcError_t wcudaArrayGetPlane(mcArray_t *pPlaneArray, mcArray_t hArray, unsigned int planeIdx);
mcError_t wcudaArrayGetMemoryRequirements(mcArrayMemoryRequirements *memoryRequirements,
                                          mcArray_t array, int device);
mcError_t wcudaMipmappedArrayGetMemoryRequirements(mcArrayMemoryRequirements *memoryRequirements,
                                                   mcMipmappedArray_t mipmap, int device);
mcError_t wcudaArrayGetSparseProperties(mcArraySparseProperties *sparseProperties, mcArray_t array);
mcError_t wcudaMipmappedArrayGetSparseProperties(mcArraySparseProperties *sparseProperties,
                                                 mcMipmappedArray_t mipmap);
mcError_t wcudaMemset(void *devPtr, int value, size_t count);
mcError_t wcudaMemsetAsync(void *devPtr, int value, size_t count, mcStream_t stream __dv(0));
mcError_t wcudaGetSymbolAddress(void **devPtr, const void *symbol);
mcError_t wcudaGetSymbolSize(size_t *size, const void *symbol);
mcError_t wcudaHostGetFlags(unsigned int *pFlags, void *pHost);
mcError_t wcudaMallocPitch(void **devPtr, size_t *pitch, size_t width, size_t height);
mcError_t wcudaMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width,
                        size_t height, enum _mcMemcpyKind kind);
mcError_t wcudaMemcpy3D(const struct mcMemcpy3DParms *p);
mcError_t wcudaMemcpy3DAsync(const struct mcMemcpy3DParms *p, mcStream_t stream);
mcError_t wcudaMemcpy3DPeer(const mcMemcpy3DPeerParms *p);
mcError_t wcudaMemcpy3DPeerAsync(const mcMemcpy3DPeerParms *p, mcStream_t stream);
mcError_t wcudaMemcpyFromSymbol(void *dst, const void *symbol, size_t count, size_t offset __dv(0),
                                enum _mcMemcpyKind kind __dv(mcMemcpyDeviceToHost));
mcError_t wcudaMemcpyFromSymbolAsync(void *dst, const void *symbol, size_t count, size_t offset,
                                     enum _mcMemcpyKind kind, mcStream_t stream __dv(0));
mcError_t wcudaMemcpyPeer(void *dst, int dstDevice, const void *src, int srcDevice, size_t count);
mcError_t wcudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice,
                               size_t count, mcStream_t stream __dv(0));
mcError_t wcudaMemcpyToSymbol(const void *symbol, const void *src, size_t count,
                              size_t offset __dv(0),
                              enum _mcMemcpyKind kind __dv(mcMemcpyHostToDevice));
mcError_t wcudaMemcpyToSymbolAsync(const void *symbol, const void *src, size_t count, size_t offset,
                                   enum _mcMemcpyKind kind, mcStream_t stream __dv(0));
mcError_t wcudaMemset2D(void *devPtr, size_t pitch, int value, size_t width, size_t height);
mcError_t wcudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height,
                             mcStream_t stream __dv(0));
mcError_t wcudaMemset3D(struct mcPitchedPtr pitchedDevPtr, int value, struct mcExtent extent);
mcError_t wcudaMemset3DAsync(struct mcPitchedPtr pitchedDevPtr, int value, struct mcExtent extent,
                             mcStream_t stream);
struct mcExtent make_wcudaExtent(size_t w, size_t h, size_t d);
struct mcPitchedPtr make_wcudaPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz);
mcPos make_wcudaPos(size_t x, size_t y, size_t z);
mcError_t wcudaIpcGetMemHandle(mcIpcMemHandle_t *handle, void *devPtr);
mcError_t wcudaIpcOpenMemHandle(void **devPtr, mcIpcMemHandle_t handle, unsigned int flags);
mcError_t wcudaIpcCloseMemHandle(void *devPtr);

/**
 * @} Memory
 */

/**
 *  @defgroup SOMA Stream Ordered Memory Allocator
 *  @{
 *  This section describes the Stream Ordered Memory Allocator functions of CUDA Runtime Wrapper
 * API.
 *
 */
mcError_t wcudaMallocAsync(void **devPtr, size_t size, mcStream_t hStream);
mcError_t wcudaMallocFromPoolAsync(void **ptr, size_t size, mcMemPool_t memPool, mcStream_t stream);
mcError_t wcudaFreeAsync(void *devPtr, mcStream_t hStream);
mcError_t wcudaMemPoolCreate(mcMemPool_t *memPool, const struct mcMemPoolProps *poolProps);
mcError_t wcudaMemPoolDestroy(mcMemPool_t memPool);
mcError_t wcudaMemPoolExportPointer(struct mcMemPoolPtrExportData *exportData, void *ptr);
mcError_t wcudaMemPoolExportToShareableHandle(void *shareableHandle, mcMemPool_t memPool,
                                              enum mcMemAllocationHandleType handleType,
                                              unsigned int flags);
mcError_t wcudaMemPoolGetAccess(enum mcMemAccessFlags *flags, mcMemPool_t memPool,
                                struct mcMemLocation *location);
mcError_t wcudaMemPoolGetAttribute(mcMemPool_t memPool, enum mcMemPoolAttr attr, void *value);
mcError_t wcudaMemPoolImportFromShareableHandle(mcMemPool_t *memPool, void *shareableHandle,
                                                enum mcMemAllocationHandleType handleType,
                                                unsigned int flags);
mcError_t wcudaMemPoolImportPointer(void **ptr, mcMemPool_t memPool,
                                    struct mcMemPoolPtrExportData *exportData);
mcError_t wcudaMemPoolSetAccess(mcMemPool_t memPool, const struct mcMemAccessDesc *descList, size_t count);
mcError_t wcudaMemPoolSetAttribute(mcMemPool_t memPool, enum mcMemPoolAttr attr, void *value);
mcError_t wcudaMemPoolTrimTo(mcMemPool_t memPool, size_t minBytesToKeep);

/**
 * @} SOMA
 */

/**
 *  @defgroup Unified Unified Addressing
 *  @{
 *  This section describes the Unified Addressing functions of CUDA Runtime Wrapper API.
 *
 */
mcError_t wcudaMallocManaged(void **devPtr, size_t size,
                             unsigned int flags __dv(mcMemAttachGlobal));
mcError_t wcudaMemAdvise(const void *devPtr, size_t count, mcMemoryAdvise_t advice, int device);
mcError_t wcudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice,
                                mcStream_t stream __dv(0));
mcError_t wcudaMemRangeGetAttribute(void *data, size_t dataSize, mcMemRangeAttribute_t attribute,
                                    const void *devPtr, size_t count);
mcError_t wcudaMemRangeGetAttributes(void **data, size_t *dataSizes,
                                     mcMemRangeAttribute_t *attributes, size_t numAttributes,
                                     const void *devPtr, size_t count);
mcError_t wcudaPointerGetAttributes(struct _mcPointerAttribute_t *attributes, const void *ptr);

/**
 * @} Unified
 */

/**
 *  @defgroup Peer Peer Device Memory Access
 *  @{
 *  This section describes the Peer Device Memory Access functions of CUDA Runtime Wrapper API.
 *
 */
mcError_t wcudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice);
mcError_t wcudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags);
mcError_t wcudaDeviceDisablePeerAccess(int peerDevice);

/**
 * @} Peer
 */

/**
 *  @defgroup Version Version Management
 *  @{
 *  This section describes the Version Management functions of CUDA Runtime Wrapper API.
 *
 */
mcError_t wcudaDriverGetVersion(int *driverVersion);
mcError_t wcudaRuntimeGetVersion(int *runtimeVersion);

/**
 * @} Version
 */

/**
 *  @defgroup Profiler Profiler Control
 *  @{
 *  This section describes the Profiler Control functions of CUDA Runtime Wrapper API.
 *
 */
mcError_t wcudaProfilerInitialize(const char *configFile, const char *outputFile,
                                  enum mcOutputMode outputMode);
mcError_t wcudaProfilerStart(void);
mcError_t wcudaProfilerStop(void);

/**
 * @} Profiler
 */

/**
 *  @defgroup Interactions Interactions with the MACA Driver API
 *  @{
 *  This section describes the interactions between the MACA Driver API and the MACA RuntimeAPI
 */
mcError_t wcudaGetFuncBySymbol(mcFunction_t *functionPtr, const void *symbolPtr);

/**
 * @} Interactions
 */

/**
 *  @defgroup Driver Driver Entry Point Access
 *  @{
 *  This section describes the driver entry point access functions of MACA runtime application
 * programming interface.
 */

mcError_t wcudaGetDriverEntryPoint(const char *symbol, void **funcPtr, unsigned long long flags);

/**
 * @} graph function
 */
mcError_t wcudaDeviceGetGraphMemAttribute(int device, mcGraphMemAttributeType attr, void *value);

mcError_t wcudaDeviceGraphMemTrim(int device);

mcError_t wcudaDeviceSetGraphMemAttribute(int device, mcGraphMemAttributeType attr, void *value);

mcError_t wcudaGraphAddChildGraphNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                      const mcGraphNode_t *pDependencies, size_t numDependencies,
                                      mcGraph_t childGraph);

mcError_t wcudaGraphAddDependencies(mcGraph_t graph, const mcGraphNode_t *from,
                                    const mcGraphNode_t *to, size_t numDependencies);

mcError_t wcudaGraphAddEmptyNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                 const mcGraphNode_t *pDependencies, size_t numDependencies);

mcError_t wcudaGraphAddEventRecordNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                       const mcGraphNode_t *pDependencies, size_t numDependencies,
                                       mcEvent_t event);

mcError_t wcudaGraphAddEventWaitNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                     const mcGraphNode_t *pDependencies, size_t numDependencies,
                                     mcEvent_t event);

mcError_t wcudaGraphAddExternalSemaphoresSignalNode(
    mcGraphNode_t *pGraphNode, mcGraph_t graph, const mcGraphNode_t *pDependencies,
    size_t numDependencies, const mcExternalSemaphoreSignalNodeParams *nodeParams);

mcError_t
wcudaGraphAddExternalSemaphoresWaitNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                        const mcGraphNode_t *pDependencies, size_t numDependencies,
                                        const mcExternalSemaphoreWaitNodeParams *nodeParams);

mcError_t wcudaGraphAddHostNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                const mcGraphNode_t *pDependencies, size_t numDependencies,
                                const mcHostNodeParams *pNodeParams);

mcError_t wcudaGraphAddKernelNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                  const mcGraphNode_t *pDependencies, size_t numDependencies,
                                  const mcKernelNodeParams *pNodeParams);

mcError_t wcudaGraphAddMemAllocNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                    const mcGraphNode_t *pDependencies, size_t numDependencies,
                                    struct mcMemAllocNodeParams *nodeParams);

mcError_t wcudaGraphAddMemFreeNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                   const mcGraphNode_t *pDependencies, size_t numDependencies,
                                   void *dptr);

mcError_t wcudaGraphAddMemcpyNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                  const mcGraphNode_t *pDependencies, size_t numDependencies,
                                  const struct mcMemcpy3DParms *pCopyParams);

mcError_t wcudaGraphAddMemcpyNode1D(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                    const mcGraphNode_t *pDependencies, size_t numDependencies,
                                    void *dst, const void *src, size_t count, mcMemcpyKind kind);

mcError_t wcudaGraphAddMemcpyNodeFromSymbol(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                            const mcGraphNode_t *pDependencies,
                                            size_t numDependencies, void *dst, const void *symbol,
                                            size_t count, size_t offset, mcMemcpyKind kind);

mcError_t wcudaGraphAddMemcpyNodeToSymbol(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                          const mcGraphNode_t *pDependencies,
                                          size_t numDependencies, const void *symbol,
                                          const void *src, size_t count, size_t offset,
                                          mcMemcpyKind kind);

mcError_t wcudaGraphAddMemsetNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                  const mcGraphNode_t *pDependencies, size_t numDependencies,
                                  const mcMemsetParams *pMemsetParams);

mcError_t wcudaGraphChildGraphNodeGetGraph(mcGraphNode_t node, mcGraph_t *pGraph);

mcError_t wcudaGraphClone(mcGraph_t *pGraphClone, mcGraph_t originalGraph);

mcError_t wcudaGraphCreate(mcGraph_t *pGraph, unsigned int flags);

mcError_t wcudaGraphDebugDotPrint(mcGraph_t graph, const char *path, unsigned int flags);

mcError_t wcudaGraphDestroy(mcGraph_t graph);

mcError_t wcudaGraphDestroyNode(mcGraphNode_t node);

mcError_t wcudaGraphEventRecordNodeGetEvent(mcGraphNode_t node, mcEvent_t *event_out);

mcError_t wcudaGraphEventRecordNodeSetEvent(mcGraphNode_t node, mcEvent_t event);

mcError_t wcudaGraphEventWaitNodeGetEvent(mcGraphNode_t node, mcEvent_t *event_out);

mcError_t wcudaGraphEventWaitNodeSetEvent(mcGraphNode_t node, mcEvent_t event);

mcError_t wcudaGraphExecChildGraphNodeSetParams(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                                mcGraph_t childGraph);

mcError_t wcudaGraphExecDestroy(mcGraphExec_t graphExec);

mcError_t wcudaGraphExecEventRecordNodeSetEvent(mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
                                                mcEvent_t event);

mcError_t wcudaGraphExecEventWaitNodeSetEvent(mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
                                              mcEvent_t event);

mcError_t wcudaGraphExecExternalSemaphoresSignalNodeSetParams(
    mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
    const mcExternalSemaphoreSignalNodeParams *nodeParams);

mcError_t wcudaGraphExecExternalSemaphoresWaitNodeSetParams(
    mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
    const mcExternalSemaphoreWaitNodeParams *nodeParams);

mcError_t wcudaGraphExecHostNodeSetParams(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                          const mcHostNodeParams *pNodeParams);

mcError_t wcudaGraphExecKernelNodeSetParams(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                            const mcKernelNodeParams *pNodeParams);

mcError_t wcudaGraphExecMemcpyNodeSetParams(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                            const struct mcMemcpy3DParms *pNodeParams);

mcError_t wcudaGraphExecMemcpyNodeSetParams1D(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                              void *dst, const void *src, size_t count,
                                              mcMemcpyKind kind);

mcError_t wcudaGraphExecMemcpyNodeSetParamsFromSymbol(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                                      void *dst, const void *symbol, size_t count,
                                                      size_t offset, mcMemcpyKind kind);

mcError_t wcudaGraphExecMemcpyNodeSetParamsToSymbol(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                                    const void *symbol, const void *src,
                                                    size_t count, size_t offset, mcMemcpyKind kind);

mcError_t wcudaGraphExecMemsetNodeSetParams(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                            const mcMemsetParams *pNodeParams);

mcError_t wcudaGraphExecUpdate(mcGraphExec_t hGraphExec, mcGraph_t hGraph,
                               mcGraphNode_t *hErrorNode_out,
                               mcGraphExecUpdateResult *updateResult_out);

mcError_t
wcudaGraphExternalSemaphoresSignalNodeGetParams(mcGraphNode_t hNode,
                                                mcExternalSemaphoreSignalNodeParams *params_out);

mcError_t wcudaGraphExternalSemaphoresSignalNodeSetParams(
    mcGraphNode_t hNode, const mcExternalSemaphoreSignalNodeParams *nodeParams);

mcError_t
wcudaGraphExternalSemaphoresWaitNodeGetParams(mcGraphNode_t hNode,
                                              mcExternalSemaphoreWaitNodeParams *params_out);

mcError_t
wcudaGraphExternalSemaphoresWaitNodeSetParams(mcGraphNode_t hNode,
                                              const mcExternalSemaphoreWaitNodeParams *nodeParams);

mcError_t wcudaGraphGetEdges(mcGraph_t graph, mcGraphNode_t *from, mcGraphNode_t *to,
                             size_t *numEdges);

mcError_t wcudaGraphGetNodes(mcGraph_t graph, mcGraphNode_t *nodes, size_t *numNodes);

mcError_t wcudaGraphGetRootNodes(mcGraph_t graph, mcGraphNode_t *pRootNodes, size_t *pNumRootNodes);

mcError_t wcudaGraphHostNodeGetParams(mcGraphNode_t node, mcHostNodeParams *pNodeParams);

mcError_t wcudaGraphHostNodeSetParams(mcGraphNode_t node, const mcHostNodeParams *pNodeParams);

mcError_t wcudaGraphInstantiate(mcGraphExec_t *pGraphExec, mcGraph_t graph,
                                mcGraphNode_t *pErrorNode, char *pLogBuffer, size_t bufferSize);

mcError_t wcudaGraphInstantiateWithFlags(mcGraphExec_t *pGraphExec, mcGraph_t graph,
                                         unsigned long long flags);

mcError_t wcudaGraphInstantiateWithParams(mcGraphExec_t *pGraphExec, mcGraph_t graph,
                                          mcGraphInstantiateParams *instantiateParams);

mcError_t wcudaGraphExecGetFlags(mcGraphExec_t graphExec, unsigned long long *flags);

mcError_t wcudaGraphKernelNodeCopyAttributes(mcGraphNode_t hSrc, mcGraphNode_t hDst);

mcError_t wcudaGraphKernelNodeGetAttribute(mcGraphNode_t hNode, mcKernelNodeAttrID attr,
                                           mcKernelNodeAttrValue *value_out);

mcError_t wcudaGraphKernelNodeGetParams(mcGraphNode_t node, mcKernelNodeParams *pNodeParams);

mcError_t wcudaGraphKernelNodeSetAttribute(mcGraphNode_t hNode, mcKernelNodeAttrID attr,
                                           const mcKernelNodeAttrValue *value);

mcError_t wcudaGraphKernelNodeSetParams(mcGraphNode_t node, const mcKernelNodeParams *pNodeParams);

mcError_t wcudaGraphLaunch(mcGraphExec_t graphExec, mcStream_t stream);

mcError_t wcudaGraphMemAllocNodeGetParams(mcGraphNode_t node,
                                          struct mcMemAllocNodeParams *params_out);

mcError_t wcudaGraphMemFreeNodeGetParams(mcGraphNode_t node, void *dptr_out);

mcError_t wcudaGraphMemcpyNodeGetParams(mcGraphNode_t node, struct mcMemcpy3DParms *pNodeParams);

mcError_t wcudaGraphMemcpyNodeSetParams(mcGraphNode_t node,
                                        const struct mcMemcpy3DParms *pNodeParams);

mcError_t wcudaGraphMemcpyNodeSetParams1D(mcGraphNode_t node, void *dst, const void *src,
                                          size_t count, mcMemcpyKind kind);

mcError_t wcudaGraphMemcpyNodeSetParamsFromSymbol(mcGraphNode_t node, void *dst, const void *symbol,
                                                  size_t count, size_t offset, mcMemcpyKind kind);

mcError_t wcudaGraphMemcpyNodeSetParamsToSymbol(mcGraphNode_t node, const void *symbol,
                                                const void *src, size_t count, size_t offset,
                                                mcMemcpyKind kind);

mcError_t wcudaGraphMemsetNodeGetParams(mcGraphNode_t node, mcMemsetParams *pNodeParams);

mcError_t wcudaGraphMemsetNodeSetParams(mcGraphNode_t node, const mcMemsetParams *pNodeParams);

mcError_t wcudaGraphNodeFindInClone(mcGraphNode_t *pNode, mcGraphNode_t originalNode,
                                    mcGraph_t clonedGraph);

mcError_t wcudaGraphNodeGetDependencies(mcGraphNode_t node, mcGraphNode_t *pDependencies,
                                        size_t *pNumDependencies);

mcError_t wcudaGraphNodeGetDependentNodes(mcGraphNode_t node, mcGraphNode_t *pDependentNodes,
                                          size_t *pNumDependentNodes);

mcError_t wcudaGraphNodeGetEnabled(mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
                                   unsigned int *isEnabled);

mcError_t wcudaGraphNodeGetType(mcGraphNode_t node, mcGraphNodeType *pType);

mcError_t wcudaGraphNodeSetEnabled(mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
                                   unsigned int isEnabled);

mcError_t wcudaGraphReleaseUserObject(mcGraph_t graph, mcUserObject_t object,
                                      unsigned int count __dv(1));

mcError_t wcudaGraphRemoveDependencies(mcGraph_t graph, const mcGraphNode_t *from,
                                       const mcGraphNode_t *to, size_t numDependencies);

mcError_t wcudaGraphRetainUserObject(mcGraph_t graph, mcUserObject_t object,
                                     unsigned int count __dv(1), unsigned int flags __dv(0));

mcError_t wcudaGraphUpload(mcGraphExec_t graphExec, mcStream_t stream);

mcError_t wcudaUserObjectCreate(mcUserObject_t *object_out, void *ptr, mcHostFn_t destroy,
                                unsigned int initialRefcount, unsigned int flags);

mcError_t wcudaUserObjectRelease(mcUserObject_t object, unsigned int count __dv(1));

mcError_t wcudaUserObjectRetain(mcUserObject_t object, unsigned int count __dv(1));

/**
 * @} Driver
 */

/**
 *  @defgroup External External  Resource Interoperability
 *  @{
 *  This section describes the external resource interoperability functions of the MACA runtime
 * application programming interface
 */
mcError_t wcudaImportExternalSemaphore(mcExternalSemaphore_t *extSem_out,
                                       const mcExternalSemaphoreHandleDesc *semHandleDesc);
mcError_t wcudaDestroyExternalSemaphore(mcExternalSemaphore_t extSem);
mcError_t wcudaSignalExternalSemaphoresAsync(const mcExternalSemaphore_t *extSemArray,
                                             const mcExternalSemaphoreSignalParams *paramsArray,
                                             unsigned int numExtSems, mcStream_t stream);
mcError_t wcudaWaitExternalSemaphoresAsync(const mcExternalSemaphore_t *extSemArray,
                                           const mcExternalSemaphoreWaitParams *paramsArray,
                                           unsigned int numExtSems, mcStream_t stream);
mcError_t wcudaImportExternalMemory(mcExternalMemory_t *extMem_out,
                                    const mcExternalMemoryHandleDesc *memHandleDesc);
mcError_t wcudaDestroyExternalMemory(mcExternalMemory_t extMem);
mcError_t wcudaExternalMemoryGetMappedBuffer(void **devPtr, mcExternalMemory_t extMem,
                                             const mcExternalMemoryBufferDesc *bufferDesc);
mcError_t
wcudaExternalMemoryGetMappedMipmappedArray(mcMipmappedArray_t *mipmap, mcExternalMemory_t extMem,
                                           const mcExternalMemoryMipmappedArrayDesc *mipmapDesc);
/**
 * @} External
 */

/**
 * @defgroup INTEROP Graphics Interoperability
 *
 * This section describes the graphics interoperability functions of the MACA
 * runtime application programming interface.
 *
 * @{
 */

mcError_t wcudaGraphicsMapResources(int count, mcGraphicsResource_t *resources,
                                    mcStream_t stream __dv(0));
mcError_t wcudaGraphicsResourceGetMappedMipmappedArray(mcMipmappedArray_t *mipmappedArray,
                                                       mcGraphicsResource_t resource);
mcError_t wcudaGraphicsResourceGetMappedPointer(void **devPtr, size_t *size,
                                                mcGraphicsResource_t resource);
mcError_t wcudaGraphicsResourceSetMapFlags(mcGraphicsResource_t resource, unsigned int flags);
mcError_t wcudaGraphicsSubResourceGetMappedArray(mcArray_t *array, mcGraphicsResource_t resource,
                                                 unsigned int arrayIndex, unsigned int mipLevel);
mcError_t wcudaGraphicsUnmapResources(int count, mcGraphicsResource_t *resources,
                                      mcStream_t stream __dv(0));
mcError_t wcudaGraphicsUnregisterResource(mcGraphicsResource_t resource);

/**
 * @} INTEROP
 */

/**
 * @defgroup Texture Object Management
 *
 * This section describes the low level texture object management functions
 * of the MACA runtime application programming interface.
 *
 * @{
 */
mcError_t wcudaGetChannelDesc(mcChannelFormatDesc *desc, mcArray_const_t array);

mcChannelFormatDesc wcudaCreateChannelDesc(int x, int y, int z, int w, enum mcChannelFormatKind_enum f);

mcError_t wcudaCreateTextureObject(mcTextureObject_t *pTexObject,
                                   const struct mcResourceDesc *pResDesc,
                                   const struct mcTextureDesc *pTexDesc,
                                   const struct mcResourceViewDesc *pResViewDesc);

mcError_t wcudaDestroyTextureObject(mcTextureObject_t texObject);

mcError_t wcudaGetTextureObjectResourceDesc(struct mcResourceDesc *pResDesc,
                                            mcTextureObject_t texObject);

mcError_t wcudaGetTextureObjectTextureDesc(struct mcTextureDesc *pTexDesc,
                                           mcTextureObject_t texObject);

mcError_t wcudaGetTextureObjectResourceViewDesc(struct mcResourceViewDesc *pResViewDesc,
                                                mcTextureObject_t texObject);

/**
 * @} TEXTURE_OBJECT
 */

/**
 * @defgroup Texture Reference Management [DEPRECATED]
 *
 * This section describes the low level texture reference management functions
 * of the MACA runtime application programming interface.
 *
 * @{
 */

/* TODO:wcuda_skipped - for Texture Reference Management */
mcError_t wcudaBindTexture(size_t *offset, const struct textureReference *texref,
                           const void *devPtr, const mcChannelFormatDesc *desc,
                           size_t size __dv(UINT_MAX));
/* TODO:wcuda_skipped - for Texture Reference Management */
mcError_t wcudaBindTexture2D(size_t *offset, const struct textureReference *texref,
                             const void *devPtr, const mcChannelFormatDesc *desc, size_t width,
                             size_t height, size_t pitch);
/* TODO:wcuda_skipped - for Texture Reference Management */
mcError_t wcudaBindTextureToArray(const struct textureReference *texref, mcArray_const_t array,
                                  const mcChannelFormatDesc *desc);
/* TODO:wcuda_skipped - for Texture Reference Management */
mcError_t wcudaBindTextureToMipmappedArray(const struct textureReference *texref,
                                           mcMipmappedArray_const_t mipmappedArray,
                                           const mcChannelFormatDesc *desc);
/* TODO:wcuda_skipped - for Texture Reference Management */
mcError_t wcudaUnbindTexture(const struct textureReference *texref);
/* TODO:wcuda_skipped - for Texture Reference Management */
mcError_t wcudaGetTextureAlignmentOffset(size_t *offset, const struct textureReference *texref);

/* TODO:wcuda_skipped - for Texture Reference Management */
mcError_t wcudaGetTextureReference(const struct textureReference **texref, const void *symbol);

/* TODO:wcuda_skipped - for Texture Reference Management */
mcError_t wcudaGetMipmappedArrayLevel(mcArray_t *levelArray,
                                      mcMipmappedArray_const_t mipmappedArray, unsigned int level);

/**
 * @} TEXTURE
 */

/**
 * @defgroup Surface Reference Management [DEPRECATED]
 *
 * This section describes the low level surface reference management functions
 * of the MACA runtime application programming interface.
 *
 * @{
 */

/* TODO:wcuda_skipped - for Surface Reference Management */
mcError_t wcudaBindSurfaceToArray(const struct surfaceReference *surfref, mcArray_const_t array,
                                  const mcChannelFormatDesc *desc);

/* TODO:wcuda_skipped - for Surface Reference Management */
mcError_t wcudaGetSurfaceReference(const struct surfaceReference **surfref, const void *symbol);

/* TODO:wcuda_skipped - for Surface Reference Management */
mcError_t wcudaCreateSurfaceObject(mcSurfaceObject_t *pSurfObject,
                                   const struct mcResourceDesc *pResDesc);

/* TODO:wcuda_skipped - for Surface Reference Management */
mcError_t wcudaDestroySurfaceObject(mcSurfaceObject_t surfObject);

/* TODO:wcuda_skipped - for Surface Reference Management */
mcError_t wcudaGetSurfaceObjectResourceDesc(struct mcResourceDesc *pResDesc,
                                            mcSurfaceObject_t surfObject);
/** @} */ /* END SURFACE */

mcError_t wcudaGetExportTable(const void **ppExportTable, const mcUuid_t *pExportTableId);

#ifdef __cplusplus
} /* extern "C" */
#endif

#ifdef __cplusplus
mcError_t wcudaEventCreate(mcEvent_t *event, unsigned int flags);
mcError_t wcudaMallocHost(void **ptr, size_t size, unsigned int flags);
mcError_t wcudaMallocAsync(void **devPtr, size_t size, mcMemPool_t memPool, mcStream_t stream);
#endif

/*
 * @defgroup cuda runtime template C++ wrapper
 *
 * Perform automatic type conversion to eliminate need for excessive typecasting (ie void**)
 *
 * __MC_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these
 * wrappers. It is useful for applications which need to obtain decltypes of
 * CUDA runtime APIs.
 *
 */
#if defined(__cplusplus) && !defined(__MC_DISABLE_CPP_FUNCTIONS__)
#include "cuda_channel_descriptor_wrapper.h"
#include "cuda_runtime_template_wrapper.h"
#endif

/**
 * @} wcudaRuntime
 */

#endif
